C/C++ Users Group Library 1996 July

home *** CD-ROM | disk | FTP | other *** search

/ C/C++ Users Group Library 1996 July / C-C++ Users Group Library July 1996.iso / vol_400 / 424_01 / ed_157 / match_search.c < prev next >

Wrap

C/C++ Source or Header | 1993-09-11 | 20.0 KB | 670 lines

/* * Copyright (C) 1992 by Rush Record (rhr@clio.rice.edu) * * This file is part of ED. * * ED is free software; you can redistribute it and/or modify it under the terms * of the GNU General Public License as published by the Free Software Foundation. * * ED is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with ED * (see the file COPYING). If not, write to the Free Software Foundation, 675 * Mass Ave, Cambridge, MA 02139, USA. */ #include "opsys.h" #include <stdio.h> #include <string.h> #include "memory.h" #include "ctyp_dec.h" #include "rec.h" #include "window.h" #include "ed_dec.h" #include "buffer.h" #include "buf_dec.h" #include "cmd_enum.h" #include "regex.h" #include "handy.h" #define MAX_REGEX 64 /* maximum number of regexps that may be extracted from user's search string */ static Uchar normal[256] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff }; static Uchar table[257]; /* this is for user-specified search tables */ static Char general,beginning,wild,tabledriven,regexp,wordmode; static Uchar *tbl1,*tbl2; static struct re_pattern_buffer pbuf,wbuf[MAX_REGEX]; static Char startrec,startrecs[MAX_REGEX],endrec,endrecs[MAX_REGEX],must_recompile_regex; static Char *delim = NULL; /* word-delimiter table */ static Int nwords = 0; extern Char *wordtable(); /******************************************************************************\ |Routine: match_init |Callby: edit find_string word_fill |Purpose: Initializes local variables for match_search. |Arguments: | flags is the set of flags that constrain the search. They are: | [0] = g means case-insensitive, else (e) case-sensitive. | [1] = b means match position is at beginning of string, else (e) end. | [2] = w means WILDCARD is wildcard, else (n) not. | [3] = n means no search table, else (t) table in effect. | [4] = r means use regular expression searching, n means don't. Overrides the rest of the flags, except beginning/end. | [5] = c means search-by-character (the default), else (w) search-by-word. \******************************************************************************/ void match_init(flags) Char flags[6]; { general = flags[((int)SMODE_GENERAL) >> 1]; beginning = flags[((int)SMODE_BEGINNING) >> 1]; wild = flags[((int)SMODE_WILDCARDS) >> 1]; tabledriven = flags[((int)SMODE_TABLE_DRIVEN) >> 1]; if((regexp = flags[((int)SMODE_REGEXP) >> 1]) == 'r') must_recompile_regex = 1; wordmode = (flags[((int)SMODE_CHAR) >> 1] == 'w'); tbl1 = (tabledriven == 't')? normal : (general == 'g')? _my_toupper_ : normal; tbl2 = (tabledriven == 't')? table : (general == 'g')? _my_toupper_ : normal; delim = wordtable(); } /******************************************************************************\ |Routine: parse_regex |Callby: match_search |Purpose: Preprocesses regexps so they work right, and compiles them. |Arguments: | currec is the record being compared to the search string. | curbyt is the position in the record where the comparison starts. | searchbuf is the buffer containing the search string. \******************************************************************************/ void parse_regexp(pbuf,buf,startrec,endrec) struct re_pattern_buffer *pbuf; Char *buf; Char *startrec,*endrec; { Int i; Char *p; if(buf[0] == '^') /* check for ^... */ { *startrec = 1; i = strlen(buf) - 1; if(i > 0 && buf[1] == '^') /* change ^^... to \^... */ buf[0] = '\\'; else { for(p = buf;i--;p++) /* slide exp to left */ *p = *(p + 1); *p = '\0'; } } else *startrec = 0; i = strlen(buf) - 1; if(buf[i] == '$') /* check for ...$ */ { *endrec = 1; if(i > 1 && buf[i - 1] == '$') /* change ...$$ to ...\$ */ buf[i - 1] = '\\'; else buf[i] = '\0'; /* shorten exp */ } else *endrec = 0; re_compile_pattern(buf,strlen(buf),pbuf); /* let the re_match call report an erroneous RE */ } /******************************************************************************\ |Routine: match_search |Callby: edit find_string inquire word_fill |Purpose: Determines whether the current position is on a matching string. | Returns the length of the matching string. |Arguments: | currec is the record being compared to the search string. | curbyt is the position in the record where the comparison starts. | searchbuf is the buffer containing the search string. \******************************************************************************/ Int match_search(currec,curbyt,searchbuf) rec_ptr currec; Int curbyt; buf_ptr searchbuf; { register Uchar *cr,*cs,*t1,*t2,*p,*q; register Int c1,c2; rec_ptr r,s,tr,ts; Int lr,ls,ss; Int offset; Char rbuf[1100],*pp,*qq; Int i,j; Char buf[1024]; if((s = searchbuf->first) == (rec_ptr)&searchbuf->first) return(0); if(wordmode && searchbuf->nrecs > 1) return(0); if(must_recompile_regex) { must_recompile_regex = 0; if(wordmode) { ts = s; ss = 0; nwords = 0; strcpy(buf,s->data); strcat(buf," "); for(pp = buf;(qq = strchr(pp,' '));pp = ++qq) { memcpy(rbuf,pp,(i = qq - pp)); rbuf[i] = '\0'; parse_regexp(wbuf + nwords,rbuf,startrecs + nwords,endrecs + nwords); if(++nwords > 64) break; } } else { strcpy(buf,searchbuf->first->data); parse_regexp(&pbuf,buf,&startrec,&endrec); } } r = currec; cr = (Uchar *)&r->data[curbyt]; offset = 0; t1 = tbl1; t2 = tbl2; if(beginning == 'b') { lr = r->length - curbyt; if(WINDOW[CURWINDOW].binary) /* binary-mode search ignores linebreaks, has special handling for <0d> in search buffer */ { if(curbyt == r->length) /* disallow matches at eol in begin mode */ return(0); cs = (Uchar *)s->data; /* start at beginning of search buffer's first record */ ls = s->length; while(1) { if(r == BASE) /* hit end of file */ return(0); /* insure we have another search buffer byte */ while(!ls) /* this record in search buffer ends, advance */ { if((s = s->next) == (rec_ptr)&searchbuf->first) /* end of search buffer, succeed */ return(1); while(!lr) /* insure another file byte is available */ { if((r = r->next) == BASE) return(0); lr = r->length; cr = (Uchar *)r->data; } if(*(t2 + *cr++) != 13) /* require a CR to match the search buffer linebreak */ return(0); lr--; offset++; /* advance for CR */ cs = (Uchar *)s->data; /* new pointers */ ls = s->length; } /* insure we have another file byte */ while(!lr) /* ran out on file record, advance */ { if((r = r->next) == BASE) return(0); lr = r->length; cr = (Uchar *)r->data; } if(wild == 'n') { if(*(t1 + *cs++) != *(t2 + *cr++)) /* compare */ return(0); } else { if(*cs == WILDCARD) /* wildcards match anything */ { cs++; cr++; } else if(*(t1 + *cs++) != *(t2 + *cr++)) /* compare */ return(0); } ls--; lr--; offset++; /* advance offset */ } } if(regexp == 'r' && !wordmode) { if(startrec && curbyt) /* ignore attempts not at beginning of record when ^... present */ return(0); if((i = re_match(&pbuf,(const char *)cr,lr,0,NULL)) == -2) goto regex_error; if(i < 0) return(0); if(endrec && (curbyt + i != r->length)) /* require match at end of record when ...$ present */ return(0); return(i); /* note that re_match may return 0 as a success code. hmmm. */ } if(wild == 'n' || regexp == 'r') /* no wild cards, or wordmode regex processing */ { /* word mode (beginning): 0) find a word in s, else succeed. 1) find a word in r, else fail. 2) if words don't compare equal, fail. 6) go to 2) */ if(wordmode) { if(r == BASE) return(0); if(regexp != 'r') { for(cs = (Uchar *)s->data,ls = s->length;ls--;cs++) if(!delim[*cs]) break; if(!ls) return(0); /* search string contains only delims, or is empty */ } if(!on_a_word(r,curbyt)) /* we can only succeed when on a word */ return(0); ss = 0; /* this is the current offset in the search string */ tr = r; /* this is a temporary rec_ptr for traversing the file */ ts = s; /* this is a temporary rec_ptr for traversing the search string */ if(regexp == 'r') { if(!nwords) return(0); for(j = 0;j < nwords;j++) { if(startrecs[j] && curbyt) /* ignore attempts not at beginning of record when ^... present */ return(0); if(!get_word_start(BASE,&tr,&curbyt,&lr,&offset)) return(0); if((i = re_match(wbuf + j,(const char *)tr->data + curbyt,lr,0,NULL)) == -2) goto regex_error; if(i < 0) return(0); if(endrecs[j] && (curbyt + i != r->length)) /* require match at end of record when ...$ present */ return(0); curbyt += lr; /* advance to past end of word in file */ } } else { while(1) { if(!get_word_start(s,&ts,&ss,&ls,NULL)) break; /* we have a hit */ if(!get_word_start(BASE,&tr,&curbyt,&lr,&offset)) return(0); if(ls != lr) /* word length mismatch */ return(0); for(p = (Uchar *)ts->data + ss,q = (Uchar *)tr->data + curbyt,i = ls;i--;p++,q++) /* compare the two words */ if(*(t1 + *p) != *(t2 + *q)) return(0); curbyt += lr; /* advance to past end of word in file */ ss += ls; /* advance to past end of word in search string */ } } } else /* not wordmode */ do { if(r == BASE) return(0); if((ls = s->length) > lr) return(0); for(cs = (Uchar *)s->data;ls--;) { if(*(t1 + *cs++) != *(t2 + *cr++)) return(0); offset++; } if((s = s->next) != (rec_ptr)&searchbuf->first) /* insure the file record ends here too */ { if(cr != (Uchar *)&r->data[r->length]) /* search record ends, file record continues */ return(0); r = r->next; lr = r->length; cr = (Uchar *)r->data; offset++; } } while(s != (rec_ptr)&searchbuf->first); } else /* WILDCARD wild cards allowed */ { if(wordmode) { for(cs = (Uchar *)s->data,ls = s->length;ls--;cs++) if(!delim[*cs]) break; if(!ls) return(0); /* search string contains only delims, or is empty */ if(r == BASE) return(0); if(!on_a_word(r,curbyt)) /* we can only succeed when on a word */ return(0); ss = 0; /* this is the current offset in the search string */ tr = r; /* this is a temporary rec_ptr for traversing the file */ ts = s; /* this is a temporary rec_ptr for traversing the search string */ while(1) { if(!get_word_start(s,&ts,&ss,&ls,NULL)) break; /* we have a hit */ if(!get_word_start(BASE,&tr,&curbyt,&lr,&offset)) return(0); if(ls != lr) /* word length mismatch */ return(0); for(p = (Uchar *)ts->data + ss,q = (Uchar *)tr->data + curbyt,i = ls;i--;p++,q++) /* compare the two words */ if(*p != WILDCARD) if(*(t1 + *p) != *(t2 + *q)) return(0); curbyt += lr; /* advance to past end of word in file */ ss += ls; /* advance to past end of word in search string */ } } else do { if(r == BASE) return(0); if((ls = s->length) > lr) return(0); for(cs = (Uchar *)s->data;ls--;) { c1 = *cs++; c2 = *cr++; if(c1 != WILDCARD) if(*(t1 + c1) != *(t2 + c2)) return(0); offset++; } if((s = s->next) != (rec_ptr)&searchbuf->first) /* insure the file record ends here too */ { if(cr != (Uchar *)&r->data[r->length]) /* search record ends, file record continues */ return(0); r = r->next; lr = r->length; cr = (Uchar *)r->data; offset++; } } while(s != (rec_ptr)&searchbuf->first); } } else /* we are SET SEARCH END */ { lr = curbyt; if(regexp == 'r' && !wordmode) { if(endrec && (curbyt != r->length)) /* ignore attempts not at end of record when ...$ is present */ return(0); while(lr > 0) { if((i = re_match(&pbuf,(const char *)(r->data + curbyt - lr),lr,0,NULL)) == -2) goto regex_error; else if(i >= 0) { if(lr == i) { if(startrec && (i != curbyt)) /* require match at beginning of record when ^... is present */ return(0); return(-lr); } } lr--; } return(0); } s = searchbuf->last; if(WINDOW[CURWINDOW].binary) /* binary-mode search ignores linebreaks, has special handling for <0d> in search buffer */ { if(!curbyt) /* disallow bol matches in end mode */ return(0); ls = s->length; cs = (Uchar *)s->data + ls; /* start at the end of search buffer's last record */ while(1) { if(r == BASE) /* hit end of file */ return(0); /* insure we have another search buffer byte */ while(!ls) /* hit beginning of this record in search buffer, advance */ { if((s = s->prev) == (rec_ptr)&searchbuf->first) /* hit beginning of search buffer, succeed */ return(1); while(!lr) /* insure another file byte is available */ { if((r = r->prev) == BASE) return(0); lr = r->length; cr = (Uchar *)r->data + lr; } if(*(t2 + *--cr) != 13) /* require a CR to match the search buffer linebreak */ return(0); lr--; offset++; /* advance for CR */ ls = s->length; cs = (Uchar *)s->data + ls; /* new pointers */ } /* insure we have another file byte */ while(!lr) /* ran out on file record, advance */ { if((r = r->prev) == BASE) return(0); lr = r->length; cr = (Uchar *)r->data + lr; } if(wild == 'n') { if(*(t1 + *--cs) != *(t2 + *--cr)) /* compare */ return(0); } else { if(*--cs == WILDCARD) cr--; else if(*(t1 + *cs) != *(t2 + *--cr)) /* compare */ return(0); } ls--; lr--; offset++; /* advance offset */ } } if(wild == 'n' || regexp == 'r') /* no wild cards, or in regex mode */ { if(wordmode) { if(regexp != 'r') { for(cs = (Uchar *)s->data,ls = s->length;ls--;cs++) if(!delim[*cs]) break; if(!ls) return(0); /* search string contains only delims, or is empty */ } if(r == BASE) return(0); if(!on_a_wordend(r,curbyt)) /* we can only succeed when on a wordend */ return(0); ss = strlen(s->data); /* this is the current offset in the search string */ tr = r; /* this is a temporary rec_ptr for traversing the file */ ts = s; /* this is a temporary rec_ptr for traversing the search string */ if(regexp == 'r') { if(!nwords) return(0); for(j = nwords - 1;j >= 0;j--) { if(startrecs[j] && curbyt) /* ignore attempts not at beginning of record when ^... present */ return(0); if(!get_word_end(BASE,&tr,&curbyt,&lr,&offset)) return(0); if((i = re_match(wbuf + j,(const char *)tr->data + curbyt,lr,0,NULL)) == -2) goto regex_error; if(i < 0) return(0); if(endrecs[j] && (curbyt + i != r->length)) /* require match at end of record when ...$ present */ return(0); } } else { while(1) { if(!get_word_end(s,&ts,&ss,&ls,NULL)) break; /* we have a hit */ if(!get_word_end(BASE,&tr,&curbyt,&lr,&offset)) return(0); if(ls != lr) /* word length mismatch */ return(0); for(p = (Uchar *)ts->data + ss,q = (Uchar *)tr->data + curbyt,i = ls;i--;p++,q++) /* compare the two words */ if(*(t1 + *p) != *(t2 + *q)) return(0); } } } else /* not wordmode */ do { if(r == BASE) return(0); if((ls = s->length) > lr) return(0); for(cs = (Uchar *)&s->data[ls];ls--;) { if(*(t1 + *--cs) != *(t2 + *--cr)) return(0); offset--; } if((s = s->prev) != (rec_ptr)&searchbuf->first) /* insure the file record ends here too */ { if(cr != (Uchar *)r->data) /* search record ends, file record continues */ return(0); r = r->prev; lr = r->length; cr = (Uchar *)&r->data[lr]; offset--; } } while(s != (rec_ptr)&searchbuf->first); } else /* WILDCARD wild cards allowed */ { if(wordmode) { for(cs = (Uchar *)s->data,ls = s->length;ls--;cs++) if(!delim[*cs]) break; if(!ls) return(0); /* search string contains only delims, or is empty */ if(r == BASE) return(0); if(!on_a_wordend(r,curbyt)) /* we can only succeed when on a wordend */ return(0); ss = strlen(s->data); /* this is the current offset in the search string */ tr = r; /* this is a temporary rec_ptr for traversing the file */ ts = s; /* this is a temporary rec_ptr for traversing the search string */ while(1) { if(!get_word_end(s,&ts,&ss,&ls,NULL)) break; /* we have a hit */ if(!get_word_end(BASE,&tr,&curbyt,&lr,&offset)) return(0); if(ls != lr) /* word length mismatch */ return(0); for(p = (Uchar *)ts->data + ss,q = (Uchar *)tr->data + curbyt,i = ls;i--;p++,q++) /* compare the two words */ if(*p != WILDCARD) if(*(t1 + *p) != *(t2 + *q)) return(0); } } else do { if(r == BASE) return(0); if((ls = s->length) > lr) return(0); for(cs = (Uchar *)&s->data[ls];ls--;) { c1 = *--cs; c2 = *--cr; if(c1 != WILDCARD) if(*(t1 + c1) != *(t2 + c2)) return(0); offset--; } if((s = s->prev) != (rec_ptr)&searchbuf->first) /* insure the file record ends here too */ { if(cr != (Uchar *)r->data) /* search record ends, file record continues */ return(0); r = r->prev; lr = r->length; cr = (Uchar *)&r->data[lr]; offset--; } } while(s != (rec_ptr)&searchbuf->first); } } return(offset); regex_error: strcpy(rbuf,"'"); memcpy(rbuf + 1,SEARCHBUF.first->data,SEARCHBUF.first->length); strcpy(rbuf + SEARCHBUF.first->length + 1,"' is not a valid regular expression."); slip_message(rbuf); wait_message(); return(0); } /******************************************************************************\ |Routine: set_search_table |Callby: restore_par set_param |Purpose: Sets up a search table. |Arguments: | file is the name of the search table, to be read in from disk file. \******************************************************************************/ Int set_search_table(file) Char *file; { FILE *fp; Int l; Uchar buf[512]; strip_quotes(file); envir_subs(file); for(l = 0;l < 256;l++) table[l] = l; if(!(fp = fopen(file,"r"))) return(0); while(fgets((Char *)buf,sizeof(buf),fp)) table[buf[0]] = buf[1]; fclose(fp); return(1); }